#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@author:hengk
@contact: hengk@foxmail.com
@datetime:2019-10-28 17:46
"""
import os
class Corpus(object):
    def __init__(self,dir):
        self.dir_ = dir
        self.corpus_=[]
        self.load()

    def load(self):
        filenames = os.listdir(self.dir_)
        for i, p in enumerate(filenames):
            print("Load corpus: %s" % p)
            with open(os.path.join(self.dir_,p), encoding='utf-8') as f:
                lines = f.readlines()
            for line in lines:
                if(line.strip()==''): continue
                self.corpus_.append(line.strip())
        print("Total lines: {}".format(len(self.corpus_)))

    def get_sample(self, img_index):
        index = img_index % len(self.corpus_)
        return self.corpus_[index]

